library(data.table)
library(tidyr)

#read the data (Wave 5)

# Data of Wave 5


WV5_data <- readRDS("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/F00007944-WV5_Data_R_v20180912.rds")


# Convert WV5_data-object in data.frame 
WV5_data_df <- as.data.frame(WV5_data)

# show first five columns
head(WV5_data_df[, 1:5])

clean the data set

library(dplyr)

#rename the variables
WV5_data <- WV5_data_df %>%
  rename(sex = V235, age = V237, country = V2, wave = V1, family_important = V4, friends_important = V5, leisure_time = V6, happiness = V10, health = V11, satisfaction = V22, freedom = V46, marital_status = V55, children = V56, creativity = V80, money = V81, security = V82, goodtime = V83, help = V84, success = V85, risk = V86, proper = V87, environment = V88, tradition = V89, employment = V241, education = V238,)
WV5_data


#select only the variables of interest
WV5_data <- WV5_data %>%
  select(sex, age, country, wave, family_important, leisure_time, happiness, health, satisfaction, marital_status, children, creativity, money, security, goodtime, help, success, risk, proper, environment, tradition, employment, education)
WV5_data
#exlcusion of participants with no info about risk, sex, age, employment, merital status and children 
WV5_data_df = subset(WV5_data, risk_and_adventure > 0 & sex > 0 & age >0)
#decode the country names 
countrynames = read.csv("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/countrynames.txt", header=FALSE,as.is=TRUE)
colnames(countrynames) = c("code", "name")
WV5_data$country_lab = countrynames$name [match(WV5_data$country, countrynames$code)]
table(WV5_data$country_lab)

            Andorra           Argentina           Australia              Brazil            Bulgaria        Burkina Faso              Canada               Chile 
               1003                1002                1421                1500                1001                1534                2164                1000 
              China            Colombia          Cyprus (G)               Egypt            Ethiopia             Finland              France             Georgia 
               1991                3025                1050                3051                1500                1014                1001                1500 
            Germany               Ghana       Great Britain           Guatemala           Hong Kong             Hungary               India           Indonesia 
               2064                1534                1041                1000                1252                1007                2001                2015 
               Iran                Iraq               Italy               Japan              Jordan            Malaysia                Mali              Mexico 
               2667                2701                1012                1096                1200                1201                1534                1560 
            Moldova             Morocco         Netherlands         New Zealand              Norway                Peru              Poland             Romania 
               1046                1200                1050                 954                1025                1500                1000                1776 
             Russia              Rwanda            Slovenia        South Africa         South Korea               Spain              Sweden         Switzerland 
               2033                1507                1037                2988                1200                1200                1003                1241 
             Taiwan            Thailand Trinidad and Tobago              Turkey             Ukraine       United States             Uruguay            Viet Nam 
               1227                1534                1002                1346                1000                1249                1000                1495 
             Zambia 
               1500 
WV5_data
NA
NA

#Read Dataset (Wave 6)

WV6_data <- load("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/WV6_Data_R_v20201117.rdata") 
WV6_data <- WV6_Data_R_v20201117 
print(WV6_data)

` ``{r} #rename variables

WV6_data <- WV6_data %>%
  rename(wave = V1, risk_and_adventure = V76, sex = V240, age = V242, education = V237, country = V2)


#select only the variables of interest
WV6_data <- WV6_data %>%
  select(risk_and_adventure, sex, age, country, wave)
WV6_data
NA

#decode daraset (Wave 6)

countrynames = read.csv("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/countrynames.txt", header=FALSE,as.is=TRUE)
colnames(countrynames) = c("code", "name")
WV6_data$country_lab = countrynames$name [match(WV6_data$country, countrynames$code)]
table(WV6_data$country_lab)

            Algeria           Argentina             Armenia           Australia          Azerbaijan             Belarus              Brazil 
               1200                1030                1100                1477                1002                1535                1486 
              Chile               China            Colombia          Cyprus (G)             Ecuador               Egypt             Estonia 
               1000                2300                1512                1000                1202                1523                1533 
            Georgia             Germany               Ghana               Haiti           Hong Kong               India                Iraq 
               1202                2046                1552                1996                1000                4078                1200 
              Japan              Jordan          Kazakhstan              Kuwait          Kyrgyzstan             Lebanon               Libya 
               2443                1200                1500                1303                1500                1200                2131 
           Malaysia              Mexico             Morocco         Netherlands         New Zealand             Nigeria            Pakistan 
               1300                2000                1200                1902                 841                1759                1200 
          Palestine                Peru         Philippines              Poland               Qatar             Romania              Russia 
               1000                1210                1200                 966                1060                1503                2500 
             Rwanda           Singapore            Slovenia        South Africa         South Korea               Spain              Sweden 
               1527                1972                1069                3531                1200                1189                1206 
             Taiwan            Thailand Trinidad and Tobago             Tunisia              Turkey             Ukraine       United States 
               1238                1200                 999                1205                1605                1500                2232 
            Uruguay          Uzbekistan               Yemen            Zimbabwe 
               1000                1500                1000                1500 
WV6_data

#exclude participants with no info about risk, sex, and age

WV6_data = subset(WV6_data, risk_and_adventure > 0 & sex > 0 & age >0)

#combine the 2 dataset (Wave 6 + Wave 5)

data = rbind(WV5_data, WV6_data)
data

#number of countries

length(unique(data$country_lab))
[1] 80

#number of participants

nrow(data)
[1] 170195

#exclusion of participants

data = subset(data, risk_and_adventure > 0 & sex > 0 & age > 0)
data
NA

#number of males vs females (1 = males; 2 = females)

table(data$sex)

    1     2 
75737 81963 

#create a categorical age variable

data$agecat[data$age<20]="15-19"
data$agecat[data$age>=20 & data$age <30] = "20-29"
data$agecat[data$age>=30 & data$age <40] = "30-39"
data$agecat[data$age>=40 & data$age <50] = "40-49"
data$agecat[data$age>=50 & data$age <60] = "50-59"
data$agecat[data$age>=60 & data$age <70] = "60-69"
data$agecat[data$age>=70 & data$age <80] = "70-79"
data$agecat[data$age>=80] = "80+"

#gender variables

data$sex[data$sex == 1] <- "male"
data$sex[data$sex == 2] <- "female"

#average age of participants

mean(data$age)
[1] 41.62714

#age range

range(data$age) 
[1]  15 102

#risk taking Frequency

library(ggplot2)
ggplot(data, aes(x = risk_and_adventure)) +
  geom_histogram(binwidth = 0.5, fill = "lightblue", color = "black") +
  labs(x = "Risk Taking", y = "Frequency", title = "Histogram of Risk Taking") +
  theme_minimal()

#age frequency

ggplot(data, aes(x = age)) +
  geom_histogram(binwidth = 0.5, fill = "lightblue", color = "black") +
  labs(x = "Age", y = "Frequency", title = "Histogram of Age Distributionn") +
  theme_minimal()

#age vs risk taking


ggplot(data, aes(x = agecat, y = risk_and_adventure)) +
  geom_boxplot() +
  labs(title = "Boxplot of Risk and Adventure by Age",
       x = "Age",
       y = "Risk and Adventure") +
  theme_minimal()

NA
NA

#sex vs risk taking

ggplot(data, aes(as.factor(sex), risk_and_adventure ))+
  geom_boxplot()

data
summary(data)
 risk_and_adventure     sex                 age            country           wave      
 Min.   :1.000      Length:157700      Min.   : 15.00   Min.   : 12.0   Min.   :5.000  
 1st Qu.:3.000      Class :character   1st Qu.: 28.00   1st Qu.:276.0   1st Qu.:5.000  
 Median :4.000      Mode  :character   Median : 39.00   Median :466.0   Median :6.000  
 Mean   :3.794                         Mean   : 41.63   Mean   :478.9   Mean   :5.547  
 3rd Qu.:5.000                         3rd Qu.: 54.00   3rd Qu.:710.0   3rd Qu.:6.000  
 Max.   :6.000                         Max.   :102.00   Max.   :894.0   Max.   :6.000  
 country_lab           agecat         
 Length:157700      Length:157700     
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      

```

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkoZGF0YS50YWJsZSkKbGlicmFyeSh0aWR5cikKYGBgCgojcmVhZCB0aGUgZGF0YSAoV2F2ZSA1KQpgYGB7cn0KIyBEYXRhIG9mIFdhdmUgNQoKCldWNV9kYXRhIDwtIHJlYWRSRFMoIi9Vc2Vycy9jcmlzdGluYWNhbmRpZG8vRG9jdW1lbnRzL0dpdGh1Yi9yaXNrX3d2cy9kYXRhL1dWUy9GMDAwMDc5NDQtV1Y1X0RhdGFfUl92MjAxODA5MTIucmRzIikKCgojIENvbnZlcnQgV1Y1X2RhdGEtb2JqZWN0IGluIGRhdGEuZnJhbWUgCldWNV9kYXRhX2RmIDwtIGFzLmRhdGEuZnJhbWUoV1Y1X2RhdGEpCgojIHNob3cgZmlyc3QgZml2ZSBjb2x1bW5zCmhlYWQoV1Y1X2RhdGFfZGZbLCAxOjVdKQpgYGAKCiMgY2xlYW4gdGhlIGRhdGEgc2V0CmBgYHtyfQpsaWJyYXJ5KGRwbHlyKQoKI3JlbmFtZSB0aGUgdmFyaWFibGVzCldWNV9kYXRhIDwtIFdWNV9kYXRhX2RmICU+JQogIHJlbmFtZShzZXggPSBWMjM1LCBhZ2UgPSBWMjM3LCBjb3VudHJ5ID0gVjIsIHdhdmUgPSBWMSwgZmFtaWx5X2ltcG9ydGFudCA9IFY0LCBmcmllbmRzX2ltcG9ydGFudCA9IFY1LCBsZWlzdXJlX3RpbWUgPSBWNiwgaGFwcGluZXNzID0gVjEwLCBoZWFsdGggPSBWMTEsIHNhdGlzZmFjdGlvbiA9IFYyMiwgZnJlZWRvbSA9IFY0NiwgbWFyaXRhbF9zdGF0dXMgPSBWNTUsIGNoaWxkcmVuID0gVjU2LCBjcmVhdGl2aXR5ID0gVjgwLCBtb25leSA9IFY4MSwgc2VjdXJpdHkgPSBWODIsIGdvb2R0aW1lID0gVjgzLCBoZWxwID0gVjg0LCBzdWNjZXNzID0gVjg1LCByaXNrID0gVjg2LCBwcm9wZXIgPSBWODcsIGVudmlyb25tZW50ID0gVjg4LCB0cmFkaXRpb24gPSBWODksIGVtcGxveW1lbnQgPSBWMjQxLCBlZHVjYXRpb24gPSBWMjM4LCkKV1Y1X2RhdGEKCgojc2VsZWN0IG9ubHkgdGhlIHZhcmlhYmxlcyBvZiBpbnRlcmVzdApXVjVfZGF0YSA8LSBXVjVfZGF0YSAlPiUKICBzZWxlY3Qoc2V4LCBhZ2UsIGNvdW50cnksIHdhdmUsIGZhbWlseV9pbXBvcnRhbnQsIGxlaXN1cmVfdGltZSwgaGFwcGluZXNzLCBoZWFsdGgsIHNhdGlzZmFjdGlvbiwgbWFyaXRhbF9zdGF0dXMsIGNoaWxkcmVuLCBjcmVhdGl2aXR5LCBtb25leSwgc2VjdXJpdHksIGdvb2R0aW1lLCBoZWxwLCBzdWNjZXNzLCByaXNrLCBwcm9wZXIsIGVudmlyb25tZW50LCB0cmFkaXRpb24sIGVtcGxveW1lbnQsIGVkdWNhdGlvbikKV1Y1X2RhdGEKYGBgCgpgYGB7cn0KI2V4bGN1c2lvbiBvZiBwYXJ0aWNpcGFudHMgd2l0aCBubyBpbmZvIGFib3V0IHJpc2ssIHNleCwgYWdlLCBlbXBsb3ltZW50LCBtZXJpdGFsIHN0YXR1cyBhbmQgY2hpbGRyZW4gCldWNV9kYXRhX2RmID0gc3Vic2V0KFdWNV9kYXRhLCByaXNrX2FuZF9hZHZlbnR1cmUgPiAwICYgc2V4ID4gMCAmIGFnZSA+MCkKYGBgCgpgYGB7cn0KI2RlY29kZSB0aGUgY291bnRyeSBuYW1lcyAKY291bnRyeW5hbWVzID0gcmVhZC5jc3YoIi9Vc2Vycy9jcmlzdGluYWNhbmRpZG8vRG9jdW1lbnRzL0dpdGh1Yi9yaXNrX3d2cy9kYXRhL1dWUy9jb3VudHJ5bmFtZXMudHh0IiwgaGVhZGVyPUZBTFNFLGFzLmlzPVRSVUUpCmNvbG5hbWVzKGNvdW50cnluYW1lcykgPSBjKCJjb2RlIiwgIm5hbWUiKQpXVjVfZGF0YSRjb3VudHJ5X2xhYiA9IGNvdW50cnluYW1lcyRuYW1lIFttYXRjaChXVjVfZGF0YSRjb3VudHJ5LCBjb3VudHJ5bmFtZXMkY29kZSldCnRhYmxlKFdWNV9kYXRhJGNvdW50cnlfbGFiKQpXVjVfZGF0YQoKCmBgYAoKI1JlYWQgRGF0YXNldCAoV2F2ZSA2KQpgYGB7cn0KV1Y2X2RhdGEgPC0gbG9hZCgiL1VzZXJzL2NyaXN0aW5hY2FuZGlkby9Eb2N1bWVudHMvR2l0aHViL3Jpc2tfd3ZzL2RhdGEvV1ZTL1dWNl9EYXRhX1JfdjIwMjAxMTE3LnJkYXRhIikgCldWNl9kYXRhIDwtIFdWNl9EYXRhX1JfdjIwMjAxMTE3IApwcmludChXVjZfZGF0YSkKYGBgCmAKYGB7cn0KI3JlbmFtZSB2YXJpYWJsZXMKYGBge3J9CldWNl9kYXRhIDwtIFdWNl9kYXRhICU+JQogIHJlbmFtZSh3YXZlID0gVjEsIHJpc2tfYW5kX2FkdmVudHVyZSA9IFY3Niwgc2V4ID0gVjI0MCwgYWdlID0gVjI0MiwgZWR1Y2F0aW9uID0gVjIzNywgY291bnRyeSA9IFYyKQoKCiNzZWxlY3Qgb25seSB0aGUgdmFyaWFibGVzIG9mIGludGVyZXN0CldWNl9kYXRhIDwtIFdWNl9kYXRhICU+JQogIHNlbGVjdChyaXNrX2FuZF9hZHZlbnR1cmUsIHNleCwgYWdlLCBjb3VudHJ5LCB3YXZlKQpXVjZfZGF0YQoKYGBgCgoKI2RlY29kZSBkYXJhc2V0IChXYXZlIDYpCmBgYHtyfQpjb3VudHJ5bmFtZXMgPSByZWFkLmNzdigiL1VzZXJzL2NyaXN0aW5hY2FuZGlkby9Eb2N1bWVudHMvR2l0aHViL3Jpc2tfd3ZzL2RhdGEvV1ZTL2NvdW50cnluYW1lcy50eHQiLCBoZWFkZXI9RkFMU0UsYXMuaXM9VFJVRSkKY29sbmFtZXMoY291bnRyeW5hbWVzKSA9IGMoImNvZGUiLCAibmFtZSIpCldWNl9kYXRhJGNvdW50cnlfbGFiID0gY291bnRyeW5hbWVzJG5hbWUgW21hdGNoKFdWNl9kYXRhJGNvdW50cnksIGNvdW50cnluYW1lcyRjb2RlKV0KdGFibGUoV1Y2X2RhdGEkY291bnRyeV9sYWIpCldWNl9kYXRhCmBgYAoKI2V4Y2x1ZGUgcGFydGljaXBhbnRzIHdpdGggbm8gaW5mbyBhYm91dCByaXNrLCBzZXgsIGFuZCBhZ2UKYGBge3J9CldWNl9kYXRhID0gc3Vic2V0KFdWNl9kYXRhLCByaXNrX2FuZF9hZHZlbnR1cmUgPiAwICYgc2V4ID4gMCAmIGFnZSA+MCkKYGBgCgojY29tYmluZSB0aGUgMiBkYXRhc2V0IChXYXZlIDYgKyBXYXZlIDUpCmBgYHtyfQpkYXRhID0gcmJpbmQoV1Y1X2RhdGEsIFdWNl9kYXRhKQpkYXRhCmBgYAojbnVtYmVyIG9mIGNvdW50cmllcwpgYGB7cn0KbGVuZ3RoKHVuaXF1ZShkYXRhJGNvdW50cnlfbGFiKSkKYGBgCgojbnVtYmVyIG9mIHBhcnRpY2lwYW50cwpgYGB7cn0KbnJvdyhkYXRhKQpgYGAKI2V4Y2x1c2lvbiBvZiBwYXJ0aWNpcGFudHMKYGBge3J9CmRhdGEgPSBzdWJzZXQoZGF0YSwgcmlza19hbmRfYWR2ZW50dXJlID4gMCAmIHNleCA+IDAgJiBhZ2UgPiAwKQpkYXRhCgpgYGAKI251bWJlciBvZiBtYWxlcyB2cyBmZW1hbGVzICgxID0gbWFsZXM7IDIgPSBmZW1hbGVzKQpgYGB7cn0KdGFibGUoZGF0YSRzZXgpCmBgYAojY3JlYXRlIGEgY2F0ZWdvcmljYWwgYWdlIHZhcmlhYmxlCmBgYHtyfQpkYXRhJGFnZWNhdFtkYXRhJGFnZTwyMF09IjE1LTE5IgpkYXRhJGFnZWNhdFtkYXRhJGFnZT49MjAgJiBkYXRhJGFnZSA8MzBdID0gIjIwLTI5IgpkYXRhJGFnZWNhdFtkYXRhJGFnZT49MzAgJiBkYXRhJGFnZSA8NDBdID0gIjMwLTM5IgpkYXRhJGFnZWNhdFtkYXRhJGFnZT49NDAgJiBkYXRhJGFnZSA8NTBdID0gIjQwLTQ5IgpkYXRhJGFnZWNhdFtkYXRhJGFnZT49NTAgJiBkYXRhJGFnZSA8NjBdID0gIjUwLTU5IgpkYXRhJGFnZWNhdFtkYXRhJGFnZT49NjAgJiBkYXRhJGFnZSA8NzBdID0gIjYwLTY5IgpkYXRhJGFnZWNhdFtkYXRhJGFnZT49NzAgJiBkYXRhJGFnZSA8ODBdID0gIjcwLTc5IgpkYXRhJGFnZWNhdFtkYXRhJGFnZT49ODBdID0gIjgwKyIKYGBgCgoKI2dlbmRlciB2YXJpYWJsZXMKYGBge3J9CmRhdGEkc2V4W2RhdGEkc2V4ID09IDFdIDwtICJtYWxlIgpkYXRhJHNleFtkYXRhJHNleCA9PSAyXSA8LSAiZmVtYWxlIgpgYGAKCiNhdmVyYWdlIGFnZSBvZiBwYXJ0aWNpcGFudHMKYGBge3J9Cm1lYW4oZGF0YSRhZ2UpCmBgYAoKI2FnZSByYW5nZQpgYGB7cn0KcmFuZ2UoZGF0YSRhZ2UpIApgYGAKI3Jpc2sgdGFraW5nIEZyZXF1ZW5jeQpgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQpnZ3Bsb3QoZGF0YSwgYWVzKHggPSByaXNrX2FuZF9hZHZlbnR1cmUpKSArCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGggPSAwLjUsIGZpbGwgPSAibGlnaHRibHVlIiwgY29sb3IgPSAiYmxhY2siKSArCiAgbGFicyh4ID0gIlJpc2sgVGFraW5nIiwgeSA9ICJGcmVxdWVuY3kiLCB0aXRsZSA9ICJIaXN0b2dyYW0gb2YgUmlzayBUYWtpbmciKSArCiAgdGhlbWVfbWluaW1hbCgpCmBgYAojYWdlIGZyZXF1ZW5jeQpgYGB7cn0KZ2dwbG90KGRhdGEsIGFlcyh4ID0gYWdlKSkgKwogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMC41LCBmaWxsID0gImxpZ2h0Ymx1ZSIsIGNvbG9yID0gImJsYWNrIikgKwogIGxhYnMoeCA9ICJBZ2UiLCB5ID0gIkZyZXF1ZW5jeSIsIHRpdGxlID0gIkhpc3RvZ3JhbSBvZiBBZ2UgRGlzdHJpYnV0aW9ubiIpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCiNhZ2UgdnMgcmlzayB0YWtpbmcKYGBge3J9CgpnZ3Bsb3QoZGF0YSwgYWVzKHggPSBhZ2VjYXQsIHkgPSByaXNrX2FuZF9hZHZlbnR1cmUpKSArCiAgZ2VvbV9ib3hwbG90KCkgKwogIGxhYnModGl0bGUgPSAiQm94cGxvdCBvZiBSaXNrIGFuZCBBZHZlbnR1cmUgYnkgQWdlIiwKICAgICAgIHggPSAiQWdlIiwKICAgICAgIHkgPSAiUmlzayBhbmQgQWR2ZW50dXJlIikgKwogIHRoZW1lX21pbmltYWwoKQoKCmBgYAojc2V4IHZzIHJpc2sgdGFraW5nCmBgYHtyfQpnZ3Bsb3QoZGF0YSwgYWVzKGFzLmZhY3RvcihzZXgpLCByaXNrX2FuZF9hZHZlbnR1cmUgKSkrCiAgZ2VvbV9ib3hwbG90KCkKCmBgYApgYGB7cn0KZGF0YQpgYGAKCmBgYHtyfQpzdW1tYXJ5KGRhdGEpCmBgYAoKYGBgCgoKCg==